## Replication Code for
## "Education and Social Capital"
## Apfeld, Coman, Gerring, and Jessee
## Journal of Experimental Political Science

## NOTE: This code requires the 
## following R packages to be installed 
## (version number used for paper in parentheses):
##     dplyr (1.0.0)
##     tidyr (1.1.0)
##     magrittr (1.5)
##     ggplot2 (3.3.2)
##     ggrepel (0.8.2)
##     forcats (0.5.0)
##     data.table (1.12.8)
## users should be sure to install all of these 
## packages BEFORE running the code below 

# Load required packages
library(dplyr)
library(tidyr)
library(magrittr)
library(ggplot2)
library(ggrepel)
library(forcats)

# Download Barro-Lee dataset (NOTE: downloaded 12/20/21)
#bl_url <- "https://barrolee.github.io/BarroLeeDataSet/BLData/BL2013_MF_v2.2.csv"
#dat <- data.table::fread(bl_url)
dat <- read.csv("BL2013_MF_v2.2.csv")

# Transform dataset:
#    1- Select only 15-19 and 20-24 age groups
#    2- Generate raw pop numbers with some tertiary ed
#       based on total pop and percent with some tertiary ed
#    3- Group by country and year
#    4- Generate new percent with some tertiary ed variable
#       based on raw pop with some tertiary ed and total pop.
#       This collapses 15-19 and 20-24 age groups
dat_sm <- dat %>%
  filter(ageto %in% c(19, 24)) %>%
  mutate(lh_pop = lh * pop) %>%
  group_by(country, year) %>%  
  summarize(pct_tert = sum(lh_pop) / sum(pop)) %>%
  ungroup() %>%  
  data.frame

# Sort data by percent of population with some tertiary education
# and select only 2010 data. Define a new country name variable
# that is blank for all but Romania
dat_sorted <- dat_sm %>%
  filter(year == 2010) %>%
  mutate(country_sorted = fct_reorder(country, pct_tert),
         country_lab = ifelse(country_sorted == 'Romania', 'Romania', '')) %>%
  arrange(pct_tert)

# Define a vector of colors to use for points: black for Romania light gray for others
country_point_cols <- ifelse(dat_sorted$country_sorted == "Romania", "Black", "Light Gray")

# Plot and save
p1 <- ggplot(dat_sorted, aes(x = pct_tert, y = country_sorted, label = country_lab)) +
  geom_point(color = country_point_cols, size = 2) +
  geom_text_repel() + 
  labs(y = "", x = "Percent of 15-24 year-olds with some tertiary education") +
  theme(panel.background = element_blank(),
        axis.line = element_line(color = "black"),
        axis.title = element_text(size = 16),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())
print(p1)
